*     *****************************************************************               * 
*     *****************************************************************               * 
*       File-Name:      getFranceDataset.do                                           *  
*       Date:           2 June 2016                                                   * 
*       Author:         Gschwend                                                      * 
*       Purpose:      	get aggregated data for France					              *
* 	    Input Files:    SURV_SPA_IDF(Paris)_01_12_2013_SurveyData_cleaned.dta         * 
*						SURV_SPA_Provence_SurveyData_cleaned.dta			          *
* 						SURV_SPA_NAT_IDF(Paris)_12_19_2012_SurveyData_cleaned.dta     *
* 						SURV_SPA_NAT_Provence_12_19_2012_SurveyData_cleaned.dta       * 
*						SURV_SPA_IDF(Paris)_europe_09_22_2014_SurveyData_cleaned.dta  *
*						SURV_SPA_NAT_Provence_12_19_2012_SurveyData_cleaned.dta       *
*       Data Output:    aggregated_France.dta                                         *              
*     ****************************************************************                * 
*     ****************************************************************                * 





version 14.2
clear
capture log close
set more off



**********************************************
* Specify your own local path to the dropbox *
**********************************************


* Thomas G.'s local (office) path:
* local path /Users/gschwend/Dropbox/Andre/OUP multilevel electoral behavior book/chapter 6/replication/


* Thomas G.'s local (Air) path:
* local path /Users/thomasgschwend/Dropbox/Andre/OUP multilevel electoral behavior book/chapter 6/replication/
 
 
 

/*
* generate election-specific id
gen eid =.
replace eid = 111 if ELECID==5  //France, National, IDF (Paris)
replace eid = 112 if ELECID==6  //France, National, Provence
replace eid = 101 if ELECID==22 //France, local, IDF (Paris)
replace eid = 102 if ELECID==23 //France, local, Provence (Marseille)
replace eid = 131 if ELECID==17 //France, Europe, IDF (Paris)
replace eid = 132 if ELECID==16 //France, Europe, Provence
*/


 

*** Save variables for IDF(Paris) at the Regional level

use "SURV_FRA_Paris_06_16_2014_SurveyData_cleaned.dta", clear


gen eid = 101
gen COUNTRY = "France"
gen LEVEL = "Regional"
gen ELECID = "IDF(Paris)"


fre Q8A  Q46A Q46B

* get vote intention (also of those who have already voted)
fre Q8A
label drop Q8A
recode Q8A   1=4 2=1 3=6 4=2 5=3 6=8 7=7 8/max=.

fre Q46A
gen pid = Q46A
recode pid 1=1 2=2 3=3 4=4 5=2 6=6 7=7 8=8 9=. 10=2 88/99=.

gen spid = Q46B
recode spid 9=.

gen importance_regional = Q34C
gen importance_national = Q34B
gen importance_european = Q34D


recode GEND (2 = 1) (1 = 0)
label define GEND 1 "female" 0 "male", modify
rename GEND female

fre AGE female

local levels  "A B C D E F G H I" 
foreach lev in `levels' {
	recode Q17`lev' 99 = .
} 


gen Q17_party1 = Q17A  
egen Q17_party2 = rowmax(Q17B Q17E Q17I)
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D 
gen Q17_party6 = Q17F  
gen Q17_party7 = Q17G  
gen Q17_party8 = Q17H                  


gen polknow1 = (Q10A==1)
gen polknow2 = (Q10B==2)
gen polknow3 = (Q10C==3)
gen polknow4 = (Q10D==4)
gen polknow5 = (Q10E==5)

alpha polknow1  polknow2 polknow3 polknow4 polknow5, detail item gen(polknow)


keep eid COUNTRY LEVEL ELECID  Q8A Q17_party1 - Q17_party8 importance_* pid spid polknow AGE female
log using Parties_France.log, replace
*FranceParisReg
fre Q8A Q17*
log off
compress
save "FranceParisReg.dta", replace





*** Save variables for IDF(Paris) at the national level

use "SURV_FRA_NAT_01_05_2013_SurveyData_IdF_cleaned.dta", clear


gen eid = 111
gen COUNTRY = "France"
gen LEVEL = "National"
gen ELECID = "IDF(Paris)"


fre Q8A  Q46A Q46B

* get vote intention (also of those who have already voted)
fre Q8A
label drop Q8A
recode Q8A   80/max=.

fre Q46A
gen pid = Q46A
recode pid  88/99=.

gen spid = Q46B
recode spid 9=.
fre spid

gen importance_regional = Q34C
gen importance_national = Q34B
gen importance_european = Q34D

fre  AGE female


local levels  "A B C D E F G H I" 
foreach lev in `levels' {
	recode Q17`lev' 99 = .
} 


gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B 
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D
gen Q17_party5 = Q17E  
gen Q17_party6 = Q17F  
gen Q17_party7 = Q17G  
gen Q17_party8 = Q17H                  
gen Q17_party9 = Q17I




gen polknow1 = (Q10A==8)
gen polknow2 = (Q10B==4)
*gen polknow3 = (Q10C==3) /* Does not scale */
gen polknow4 = (Q10D==6)
gen polknow5 = (Q10E==10)
alpha polknow1  polknow2  polknow4 polknow5, detail item gen(polknow)

keep eid ELEC_CON COUNTRY LEVEL ELECID  Q8A Q17_party* importance_* pid spid polknow AGE female
log on
* FranceParisNat
fre Q8A Q17*
log off

* district level results
tab ELEC_CON

decode ELEC_CON, generate(con)
split con, p(" - ")
gen con_number = regexs(0) if(regexm(con2, "[0-9]*"))
gen con_name = trim(con1)

gen caseid = _n

gen district_name = con_name + " " + con_number

* Consistency check 0
sum caseid
return list
assert r(N)==966  & r(min)==1 & r(max)==966 & r(sum)==467061

sort district_name  caseid



merge m:1 district_name using "partycodes.dta"
tab _merge

* Which districts are not covered? Paris 14 was won in 1st round. Not in sample.
tab district_name if _merge==1
tab district_name if _merge==2

drop if _merge==2
drop _merge

* Consistency check 1
sum caseid
return list
assert r(N)==966  & r(min)==1 & r(max)==966 & r(sum)==467061


gen notwasted = (Q8A == party1 | Q8A == party2 | Q8A == party3)
replace notwasted=. if Q8A==.


drop  party1 party2 party3 departementcode departementname ELEC_CON



* Consistency check 2
sum caseid
return list
assert r(N)==966  & r(min)==1 & r(max)==966 & r(sum)==467061
drop caseid

compress
save "FranceParisNat.dta", replace




*** Save variables for IDF(Paris) at the european level

use "SURV_FRA_IdF_europe_09_22_2014_SurveyData_cleaned.dta", clear

gen eid = 131
gen COUNTRY = "France"
gen LEVEL = "European"
gen ELECID = "IDF(Paris)"

fre Q8A  Q47A

fre Q8A
label drop Q8A
recode Q8A  1=4 2=1 3=2 4=3 5=5 6=6 7=8 8=7 9=9 80/max=.



fre Q47A
gen pid = Q47A
recode pid  88/99=.


fre Q47B
gen spid = Q47B
recode spid 9=.
fre spid


fre Q34*
gen importance_regional = Q34C
gen importance_national = Q34B
gen importance_european = Q34D

recode GEND (2 = 1) (1 = 0)
label define GEND 1 "female" 0 "male", modify
rename GEND female

fre  AGE female


local levels  "A B C D E F G H I" 
foreach lev in `levels' {
	recode Q17`lev' 99 = .
} 
gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B 
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D
gen Q17_party5 = Q17E  
gen Q17_party6 = Q17F  
gen Q17_party7 = Q17G  
gen Q17_party8 = Q17H                  
gen Q17_party9 = Q17I


/*
* This looks fishy
gen polknow1 = (Q10A==8)
gen polknow2 = (Q10B==4)
*gen polknow3 = (Q10C==3) /* Does not scale */
gen polknow4 = (Q10D==10)
gen polknow5 = (Q10E==5)  /* Does not scale */
*/


gen polknow2 = (Q10B == 7)
gen polknow3 = (Q10C == 3)
gen polknow4 = (Q10D == 9) 
gen polknow5 = (Q10E == 5)

alpha   polknow2  polknow3 polknow4 polknow5, detail item gen(polknow)



keep eid COUNTRY LEVEL ELECID  Q8A Q17_party* importance_* pid spid polknow AGE female
log on
* FranceParisEU
fre Q8A Q17*
log off

compress
save "FranceParisEU.dta", replace





*** Save variables for Provence at the regional level (Marseille)

use "SURV_FRA_Marseille_06_16_2014_SurveyData_cleaned.dta", clear 

gen eid = 102
gen COUNTRY = "France"
gen LEVEL = "Regional"
gen ELECID = "Provence"


fre Q8A Q8B Q46A Q46B


replace Q8A = Q8B if Q8A==99
recode Q8A 1=4 2=1 3=2 4=3 6=8  7=.   80/max=. /* No party like/dislike scores for La liste Changer la donne */



fre Q46A
gen pid = Q46A
recode pid 1=1 2=2 3=3 5=. 6=1 7=. 8=8 9=. 10=2   80/max=. /* For MODEM &  Nouveau Parti Anticapitaliste no vote intention */


fre Q46B
gen spid = Q46B
recode spid 9=.
fre spid


fre Q34*
gen importance_regional = Q34C
gen importance_national = Q34B
gen importance_european = Q34D

recode GEND (2 = 1) (1 = 0)
label define GEND 1 "female" 0 "male", modify
rename GEND female

fre  AGE female

local levels  "A B C D E F G H I" 
foreach lev in `levels' {
	recode Q17`lev' 99 = .
} 

egen Q17_party1 = rowmax(Q17A Q17F)  
egen Q17_party2 = rowmax(Q17B Q17I)
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D
*gen Q17_party5 = Q17E  /* No Vote intention */
*gen Q17_party7 = Q17G  /* No Vote intention */
gen Q17_party8 = Q17H                  


gen polknow1 = (Q10A==1)
gen polknow2 = (Q10B==2)
gen polknow3 = (Q10C==3)
gen polknow4 = (Q10D==4)
gen polknow5 = (Q10E==5)
alpha polknow1  polknow2 polknow3 polknow4 polknow5, detail item gen(polknow)

keep eid COUNTRY LEVEL ELECID  Q8A Q17_party* importance_* pid spid Q10A-Q10E AGE female polknow
log on
*FranceProvenceReg
fre Q8A Q17*
log off

compress
save "FranceProvenceReg.dta", replace






*** Save variables for Provence at the national level


use "SURV_FRA_NAT_01_05_2013_SurveyData_IdF_and_Provence_cleaned.dta", clear 

keep if REGION == 1

gen eid = 112
gen COUNTRY = "France"
gen LEVEL = "National"
gen ELECID = "Provence"

fre Q8A Q8B Q46A Q46B

label drop Q8A
replace Q8A = Q8B if Q8A==99
recode Q8A 80/max=. 

ren GEND gend


fre Q46A
gen pid = Q46A
recode pid 80/max=. 


fre Q46B
gen spid = Q46B
recode spid 9=.
fre spid


fre Q34*
gen importance_regional = Q34C
gen importance_national = Q34B
gen importance_european = Q34D



local levels  "A B C D E F G H I" 
foreach lev in `levels' {
	recode Q17`lev' 99 = .
} 

gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B 
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D
gen Q17_party5 = Q17E  
gen Q17_party6 = Q17F  
gen Q17_party7 = Q17G  
gen Q17_party8 = Q17H                  
gen Q17_party9 = Q17I /* Assuming that Q17I is wrongly labeld and should refer to Nouveau Centre */


gen polknow1 = (Q10A==8)
gen polknow2 = (Q10B==4)
gen polknow3 = (Q10C==2)
gen polknow4 = (Q10D==6)
gen polknow5 = (Q10E==10)
alpha polknow1  polknow2 polknow3 polknow4 polknow5, detail item gen(polknow)

keep eid ELEC_CON COUNTRY LEVEL ELECID  Q8A Q17_party* importance_* pid spid Q10A-Q10E AGE gend polknow

log on
* *FranceProvenceNat
fre Q8A Q17*
log off

recode gend (2 = 1 ) (1 = 0) 
label define gend 1 "female" 0 "male", modify
rename gend female


* district level results
tab ELEC_CON

decode ELEC_CON, generate(con)
split con, p(" - ")
gen con_number = regexs(0) if(regexm(con2, "[0-9]*"))
gen con_name = trim(con1)

gen district_name = con_name + " " + con_number

gen caseid = _n

* Consistency check 4
sum caseid
return list
*assert r(N)==1949  & r(min)==1 & r(max)==1949 & r(sum)==1900275 /*** if IDF is not dropped ***/
assert r(N)==983  & r(min)==1 & r(max)==983 & r(sum)==483636

sort district_name  caseid   /*** Needed for merge to be replicable ****************/

merge m:1 district_name using "partycodes.dta"
tab _merge


* Which districts are not covered? ALPES MARITIMES 6 & 7 were won in 1st round. Handcoding!
tab district_name if _merge==1


drop if _merge==2
drop _merge





gen notwasted = (Q8A == party1 | Q8A == party2 | Q8A == party3)
replace notwasted=. if Q8A==.


/* Districts won in first round
					party1		party2	party3
ALPES MARITIMES	6	SOC1		UMP2	FN3
ALPES MARITIMES	7	UMP2		FN3
*/

gen notwasted1 = 1     if Q8A == 1  & district_name== "ALPES MARITIMES 6"
replace notwasted1 = 1 if Q8A == 2  & district_name== "ALPES MARITIMES 6"
replace notwasted1 = 1 if Q8A == 3  & district_name== "ALPES MARITIMES 6"
replace notwasted1 = 1 if Q8A == 2  & district_name== "ALPES MARITIMES 7"
replace notwasted1 = 1 if Q8A == 3  & district_name== "ALPES MARITIMES 7"

replace notwasted=1 if notwasted1==1


drop  party1 party2 party3 departementcode departementname ELEC_CON notwasted1

* Consistency check 5
sum caseid
return list
*assert r(N)==1949  & r(min)==1 & r(max)==1949 & r(sum)==1900275
assert r(N)==983  & r(min)==1 & r(max)==983 & r(sum)==483636
drop caseid

compress
save "FranceProvenceNat.dta", replace





*** Save variables for Provence at the european level ************DONE

use "SURV_FRA_Provence_europe_09_22_2014_SurveyData_cleaned.dta", clear 

gen eid = 132
gen COUNTRY = "France"
gen LEVEL = "European"
gen ELECID = "Provence"


fre Q8A
label drop Q8A
replace Q8A = Q8B if Q8A==99

recode Q8A 1=4 2=1 3=2 4=3 5=5 6=6 7=8 8=9     80/max=.
fre Q8A

fre Q47*
gen pid = Q47A
gen spid = Q47B
recode pid  80/max=. /* Lutte Ouvrière not included!!!! */
recode spid 9=.
fre *pid

gen importance_regional = Q34C
gen importance_national = Q34B
gen importance_european = Q34D


recode GEND (2 = 1) (1 = 0)
label define GEND 1 "female" 0 "male", modify
rename GEND female

fre AGE female

local levels  "A B C D E F G H I" 
foreach lev in `levels' {
	recode Q17`lev' 99 = .
} 


gen Q17_party1 = Q17A  
gen Q17_party2 = Q17B
gen Q17_party3 = Q17C  
gen Q17_party4 = Q17D 
gen Q17_party5 = Q17E
gen Q17_party6 = Q17F  
*gen Q17_party7 = Q17G  /* No respective vote intention */
gen Q17_party8 = Q17H          
gen Q17_party9 = Q17I


gen polknow2 = (Q10B == 7)
gen polknow3 = (Q10C == 9)
gen polknow4 = (Q10D == 3) 
gen polknow5 = (Q10E == 1)

alpha polknow2  polknow3 polknow4 polknow5, detail item gen(polknow)


keep eid COUNTRY LEVEL ELECID Q8A Q17_party* importance_* pid spid polknow AGE female Q10*
log on
* *FranceProvenceEU
fre Q8A Q17_party*
log close

compress
save "FranceProvenceEU.dta", replace





* append datasets 
local datasets "FranceParisReg FranceParisNat FranceParisEU FranceProvenceReg FranceProvenceNat"

foreach dataset in  `datasets' {
		append using `dataset'
		rm "`dataset'.dta"
	     		}


gen caseid = _n




* Check data consistency 6
sum caseid
return list
*assert r(N)== 6862  & r(min)==1 & r(max)==6862 & r(sum)== 23546953
assert r(N)== 5896  & r(min)==1 & r(max)==5896 & r(sum)==  17384356				
				
drop Q10* 



ren AGE age

label define Q8A 1 "party 1" 2 "party 2" 3 "party 3" 4 "party 4" 5 "party 5" 6 "party 6" 7 "party 7" 8 "party 8" 9 "party 9", modify
fre Q8A
recode Q8A 80/max = .
fre Q8A


* Check wether coding is correct
bysort ELECID: tab eid LEVEL

*create id to merge in party information for some elections
gen id = eid*100 + Q8A


* Check data consistency 7
sum caseid
return list
*assert r(N)==6862  & r(min)==1 & r(max)==6862 & r(sum)==23546953
assert r(N)== 5896  & r(min)==1 & r(max)==5896 & r(sum)==  17384356	
sort id caseid

merge m:1 id using "parties_merge.dta"

/*
   Result                           # of obs.
    -----------------------------------------
    not matched                         1,801
        from master                     1,717  (_merge==1)
        from using                         84  (_merge==2)

    matched                             5,145  (_merge==3)
    -----------------------------------------
*/


tab _merge
keep if _merge==3

* Check data consistency 8
sum caseid
return list
*assert r(N)==5145  & r(min)==1 & r(max)==6862 & r(sum)==17960137
assert r(N)== 4368  & r(min)==1 & r(max)==5896 & r(sum)== 12990853	
drop _merge



******* get district-wide results in for non-national elections



* Indicator for wasted vote 
gen seat_current0 = (seat_current==0)
gen seat_previous0 = (seat_previous==0)
* get in  district level results: France national
replace seat_current0 = 1 if notwasted==0

encode district_name, generate(district)
fre district
recode district 1=.




* Check data consistency 9
sum caseid
return list
*assert r(N)==5145  & r(min)==1 & r(max)==6862 & r(sum)==17960137
assert r(N)== 4368  & r(min)==1 & r(max)==5896 & r(sum)== 12990853	
drop caseid

sum seat_current0
return list
assert r(N)== 4368  & r(min)==0 & r(max)==1 & r(sum)== 1107

sum seat_previous0
return list
assert r(N)== 4368  & r(min)==0 & r(max)==1 & r(sum)== 709


compress
save "aggregated_France.dta", replace






